set more off
cap log close

*************************
*************************
***creates analysis dataset for first stage***
***Pollution Monitors****
***David Simon******
***July 2, 2017**********
***UPdated by David******
***06 20, 2018********
*************************
*************************


**************Globals********************

	global home 



log using "$output\mkfst_FL_113018.log", replace


set more off
cap log close

*************************
*************************
***creates analysis dataset for first stage***
***Pollution Monitors****
***David Simon******
***July 2, 2017**********
***UPdated by Jenni******
***Sept. 12, 2017********
*************************
*************************

***Work draws from Claudia's Wind/school file and Mike's distance file
***This revision of the first stage,

*****************Globals***********************

	global data 
	global output 
	global samples 
	global roaddata 
	
	
	
log using "$output\mkfst_FL_062018.log", replace

use "$data/wnpFL2010_5mi_lax_dropmonitor.dta", clear  /* we are loading this in just for the pollution monitor locations */

	gen hour=hh(datetime)
		tab hour
	gen date= dofc(datetime)
	gen month=month(date)
	tab month
	


*****************merge on closest road**********;

	sort EPAsite
	tab EPAsite

	merge m:1 EPAsite using "$samples/EPA_USHandIS.dta"
	tab _merge
	drop _merge





***JAH 9/18/17: Replacing windvalue as missing if=0
	tab windvalue1, missing
	replace windvalue1=. if windvalue1==0
	replace windvalue2=. if windvalue2==0
	replace windvalue3=. if windvalue3==0
	tab windvalue1, missing


	
************impute windvalue if missing values and calculate the circlear mean*****************
	gen windvalueimp = windvalue1 
	replace windvalueimp = windvalue2 if windvalueimp==.
	replace windvalueimp = windvalue3 if windvalueimp==.
	label var windvalueimp "imputed windvalue over 3 nearest sites"


	sum windvalue* 



* DES 7/20/17: Make a continuous measure of treatment based on windvalue
* JAH 5/23/18: Do it for each of the 5 matches roads
forv i=1/5 {
	gen windtreat`i'= abs(angle_degrees`i'-windvalueimp)   /*8/7/17: updated to windvalue1 to windvalueimp */
	label var windtreat`i' "Downwind intensity, windvalue, site `i'"
	replace windtreat`i' = 360 - windtreat`i' if windtreat`i'>180 
	sum windtreat`i' angle_degrees`i' windvalue1

* order for ease in spot check
	order  windvalue1 windvalueimp angle_degrees`i' windtreat`i'
* now normalize so that it is between 0 and 1 with 1 treated and 0 not
	sum windtreat`i'
	replace windtreat`i' = 1-(windtreat`i'/180)
	replace windtreat`i'=. if mi_to_nid`i'>1
	

	}
	
	sum windtreat* 


	
	*rename windtreat matched to the closest road to be the same 
	
*********additional data clean up: making month, hour, site-month-hour variable*****************
	egen EPA_closest = group(EPAsite)  /*here we just turn the EPA sites into a numeric variable */
	gen sitemonthhour=EPA_closest*10000+month*100+hour
	gen sitemonth = EPA_closest*100+month
************create down wind variables********************************	
	
***********create down wind variables********************************	
forv i = 1/5 {	
	gen downwind`i'=0 if windvalueimp!=. & angle_degrees`i'!=.
	label var downwind`i' "Downwind of highway, site `i'"
	replace downwind`i'=1 if abs(angle_degrees`i'-windvalueimp)<45   
	replace downwind`i'=1 if abs(angle_degrees`i'-360-windvalueimp)<45&angle_degrees`i'>315&windvalueimp<45  /*DES notes: for example if windvalue1 is zero and the
	bearing angle of the monitor is 315 degrees from the road than abs(315-0) >45 but this is still downwind */
	replace downwind`i'=1 if abs(angle_degrees`i'+360-windvalueimp)<45&angle_degrees`i'<45&windvalueimp>315   /*DES notes: for example if windvalue1 is 316 and angle_degrees
	is 0 than this is downwind.  However, abs(angle_degrees-windvalue1) >45.   So, we need to add 360  */   	
	

	foreach j in 1 4 {
	gen downwind`i'_`j'=0 if windvalueimp~=. & angle_degrees`i'~=.
	label var downwind`i'_`j' "Downwind and within `j' 10ths of mi of highway"
	replace downwind`i'_`j'=1 if downwind`i'==1 & mi_to_nid`i'<=`j'/10

	replace downwind`i'_`j'=. if mi_to_nid`i'>`j'/10
							  }
				  
	}


sum downwind*


	
	foreach j in 1 4 {
	*roadcount
		egen rdcnt_win`j'=rownonmiss(downwind*_`j' )
		replace rdcnt_win`j'=. if mi_to_nid1>`j'/10|windvalueimp==.

		egen dwind_cnt_win`j'=rowtotal(downwind*_`j' )
		replace dwind_cnt_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.

		egen dwind_any_win`j'=rowmax(downwind*_`j' )			
		replace dwind_any_win`j' =0 if dwind_cnt_win`j'==0
		replace dwind_any_win`j' =. if mi_to_nid1>`j'/10|windvalueimp==.
	}




	*use windintensity of clsoest road

gen dwintensity4h = windtreat1 if mi_to_nid1<=4/10
replace dwintensity4h=. if windtreat1 ==.
	label var dwintensity4h "Mean of intensity, 0.1=10% from upwind 4/10mi"
	
gen dwintensity1h = windtreat1 if mi_to_nid1<=1/10
replace dwintensity1h=. if windtreat1 ==.
	label var dwintensity1h "Mean of intensity, 0.1=10% from upwind 1/10mi"
	
	

	
sum windtreat1 dwind_any_win4 dwintensity4h dwintensity1h dwind_any_win1


drop windtreat*
	
************************ create outcome variables based on pollutant***********************

	gen epaOzone = EPAvalue if pollutant=="Ozone"
	gen epaCO = EPAvalue if pollutant=="CO"
	gen epaNO = EPAvalue if pollutant=="NO"
	gen epaNO2 = EPAvalue if pollutant=="NO2"
	gen epaNOx = EPAvalue if pollutant=="NOx"
	gen epaNOy = EPAvalue if pollutant=="NOy"
	gen epaPM10 = EPAvalue if pollutant=="PM10"
	gen epaPM25 = EPAvalue if pollutant=="PM25"
	gen epaPM25_loc = EPAvalue if pollutant=="epaPM25_loc"
	gen epaSO2 = EPAvalue if pollutant=="SO2"
	

tab mi_to_nid1


tab mi_to_nid1 if epaCO~=.


tab mi_to_nid1 if epaCO~=. & downwind1==1

 
******************** create indicator for school day vs. non school day hours




*now lots of extra variables to drop

drop windsite* circmean* im_circmean* im_circmean_1dayout* im_circmean_3dayout* im_windvalue* im_windvalue_1* dist* lat* lon* theta* degrees* 


* saves an analysis dataset

save "$samples\fst062018.dta", replace	


*now collapse down to make analgous with second stage;

collapse mi_to_nid* AADT* rdcnt_win* dwind_cnt_win* dwind_any_win4 dwintensity4h dwintensity1h dwind_any_win1 epa*, by(EPA_closest)
gen year=2010

save "$samples\fstmean062018.dta", replace	

tab dwind_any_win4  



log close
